package com.example.example;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/**
 * Top10Category类用于计算商品购买行为中排名前10的品类
 * 该类通过Spark SQL查询Hive表中的购买行为数据，统计各品类的购买次数，
 * 并将结果写入MySQL数据库中
 */
public class Top10Category {

    /**
     * 主函数，执行Top10品类统计分析流程
     * 1. 创建SparkSession连接
     * 2. 执行SQL查询获取购买次数最多的前10个品类
     * 3. 将查询结果写入MySQL数据库
     * 4. 关闭SparkSession连接
     *
     * @param args 命令行参数数组
     */
    public static void main(String[] args) {

        SparkSession spark = SparkSession.builder().appName("Top10Category")
                .master("local[*]")
                .enableHiveSupport()
                .getOrCreate();

        Dataset<Row> ds = spark.sql("SELECT " +
                "   category_id, count(1) as cnt " +
                "FROM " +
                "   dwd.dwd_behavior_info " +
                "WHERE type='buy' " +
                "GROUP BY category_id " +
                "ORDER BY cnt DESC " +
                "LIMIT 10");

        ds.write()
                .format("jdbc")
                .option("url","jdbc:mysql://hadoop:3306/ana_behavior_db")
                .option("dbtable","top10_category")
                .option("user","root")
                .option("password","123456")
                .option("driver","com.mysql.cj.jdbc.Driver")
                .mode("overwrite")
                .save();

        spark.close();
    }
}
